<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8" />
  <title>tf namespace | Taskflow QuickStart</title>
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Source+Sans+Pro:400,400i,600,600i%7CSource+Code+Pro:400,400i,600" />
  <link rel="stylesheet" href="m-dark+documentation.compiled.css" />
  <link rel="icon" href="favicon.ico" type="image/x-icon" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <meta name="theme-color" content="#22272e" />
</head>
<body>
<header><nav id="navigation">
  <div class="m-container">
    <div class="m-row">
      <span id="m-navbar-brand" class="m-col-t-8 m-col-m-none m-left-m">
        <a href="https://taskflow.github.io"><img src="taskflow_logo.png" alt="" />Taskflow</a> <span class="m-breadcrumb">|</span> <a href="index.html" class="m-thin">QuickStart</a>
      </span>
      <div class="m-col-t-4 m-hide-m m-text-right m-nopadr">
        <a href="#search" class="m-doc-search-icon" title="Search" onclick="return showSearch()"><svg style="height: 0.9rem;" viewBox="0 0 16 16">
          <path id="m-doc-search-icon-path" d="m6 0c-3.31 0-6 2.69-6 6 0 3.31 2.69 6 6 6 1.49 0 2.85-0.541 3.89-1.44-0.0164 0.338 0.147 0.759 0.5 1.15l3.22 3.79c0.552 0.614 1.45 0.665 2 0.115 0.55-0.55 0.499-1.45-0.115-2l-3.79-3.22c-0.392-0.353-0.812-0.515-1.15-0.5 0.895-1.05 1.44-2.41 1.44-3.89 0-3.31-2.69-6-6-6zm0 1.56a4.44 4.44 0 0 1 4.44 4.44 4.44 4.44 0 0 1-4.44 4.44 4.44 4.44 0 0 1-4.44-4.44 4.44 4.44 0 0 1 4.44-4.44z"/>
        </svg></a>
        <a id="m-navbar-show" href="#navigation" title="Show navigation"></a>
        <a id="m-navbar-hide" href="#" title="Hide navigation"></a>
      </div>
      <div id="m-navbar-collapse" class="m-col-t-12 m-show-m m-col-m-none m-right-m">
        <div class="m-row">
          <ol class="m-col-t-6 m-col-m-none">
            <li><a href="pages.html">Handbook</a></li>
            <li><a href="namespaces.html">Namespaces</a></li>
          </ol>
          <ol class="m-col-t-6 m-col-m-none" start="3">
            <li><a href="annotated.html">Classes</a></li>
            <li><a href="files.html">Files</a></li>
            <li class="m-show-m"><a href="#search" class="m-doc-search-icon" title="Search" onclick="return showSearch()"><svg style="height: 0.9rem;" viewBox="0 0 16 16">
              <use href="#m-doc-search-icon-path" />
            </svg></a></li>
          </ol>
        </div>
      </div>
    </div>
  </div>
</nav></header>
<main><article>
  <div class="m-container m-container-inflatable">
    <div class="m-row">
      <div class="m-col-l-10 m-push-l-1">
        <h1>
          tf <span class="m-thin">namespace</span>
        </h1>
        <p>taskflow namespace</p>
        <nav class="m-block m-default">
          <h3>Contents</h3>
          <ul>
            <li>
              Reference
              <ul>
                <li><a href="#nested-classes">Classes</a></li>
                <li><a href="#enum-members">Enums</a></li>
                <li><a href="#typedef-members">Typedefs</a></li>
                <li><a href="#func-members">Functions</a></li>
                <li><a href="#var-members">Variables</a></li>
              </ul>
            </li>
          </ul>
        </nav>
        <section id="nested-classes">
          <h2><a href="#nested-classes">Classes</a></h2>
          <dl class="m-doc">
            <dt>
              class <a href="classtf_1_1AsyncTask.html" class="m-doc">AsyncTask</a>
            </dt>
            <dd>class to create a dependent asynchronous task</dd>
            <dt>
              class <a href="classtf_1_1ChromeObserver.html" class="m-doc">ChromeObserver</a>
            </dt>
            <dd>class to create an observer based on Chrome tracing format</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T&gt;</div>
              class <a href="classtf_1_1cudaDeviceAllocator.html" class="m-doc">cudaDeviceAllocator</a>
            </dt>
            <dd>class to create a CUDA device allocator</dd>
            <dt>
              class <a href="classtf_1_1cudaEvent.html" class="m-doc">cudaEvent</a>
            </dt>
            <dd>class to create an RAII-styled wrapper over a native CUDA event</dd>
            <dt>
              <div class="m-doc-template">template&lt;unsigned NT, unsigned VT&gt;</div>
              class <a href="classtf_1_1cudaExecutionPolicy.html" class="m-doc">cudaExecutionPolicy</a>
            </dt>
            <dd>class to define execution policy for CUDA standard algorithms</dd>
            <dt>
              class <a href="classtf_1_1cudaFlow.html" class="m-doc">cudaFlow</a>
            </dt>
            <dd>class to create a cudaFlow task dependency graph</dd>
            <dt>
              class <a href="classtf_1_1cudaFlowCapturer.html" class="m-doc">cudaFlowCapturer</a>
            </dt>
            <dd>class to create a cudaFlow graph using stream capture</dd>
            <dt>
              class <a href="classtf_1_1cudaFlowLinearOptimizer.html" class="m-doc">cudaFlowLinearOptimizer</a>
            </dt>
            <dd>class to capture a linear CUDA graph using a sequential stream</dd>
            <dt>
              class <a href="classtf_1_1cudaFlowRoundRobinOptimizer.html" class="m-doc">cudaFlowRoundRobinOptimizer</a>
            </dt>
            <dd>class to capture a CUDA graph using a round-robin algorithm</dd>
            <dt>
              class <a href="classtf_1_1cudaFlowSequentialOptimizer.html" class="m-doc">cudaFlowSequentialOptimizer</a>
            </dt>
            <dd>class to capture a CUDA graph using a sequential stream</dd>
            <dt>
              class <a href="classtf_1_1cudaScopedDevice.html" class="m-doc">cudaScopedDevice</a>
            </dt>
            <dd>class to create an RAII-styled context switch</dd>
            <dt>
              class <a href="classtf_1_1cudaStream.html" class="m-doc">cudaStream</a>
            </dt>
            <dd>class to create an RAII-styled wrapper over a native CUDA stream</dd>
            <dt>
              class <a href="classtf_1_1cudaTask.html" class="m-doc">cudaTask</a>
            </dt>
            <dd>class to create a task handle over an internal node of a cudaFlow graph</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T&gt;</div>
              class <a href="classtf_1_1cudaUSMAllocator.html" class="m-doc">cudaUSMAllocator</a>
            </dt>
            <dd>class to create a unified shared memory (USM) allocator</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename Input, typename Output, typename C&gt;</div>
              class <a href="classtf_1_1DataPipe.html" class="m-doc">DataPipe</a>
            </dt>
            <dd>class to create a stage in a data-parallel pipeline</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename... Ps&gt;</div>
              class <a href="classtf_1_1DataPipeline.html" class="m-doc">DataPipeline</a>
            </dt>
            <dd>class to create a data-parallel pipeline scheduling framework</dd>
            <dt>
              struct <a href="structtf_1_1DefaultClosureWrapper.html" class="m-doc">DefaultClosureWrapper</a>
            </dt>
            <dd>default closure wrapper that simplies runs the given closure as is</dd>
            <dt>
              struct <a href="structtf_1_1DefaultTaskParams.html" class="m-doc">DefaultTaskParams</a>
            </dt>
            <dd>empty task parameter type for compile-time optimization</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C = <a href="structtf_1_1DefaultClosureWrapper.html" class="m-doc">DefaultClosureWrapper</a>&gt;</div>
              class <a href="classtf_1_1DynamicPartitioner.html" class="m-doc">DynamicPartitioner</a>
            </dt>
            <dd>class to construct a dynamic partitioner for scheduling parallel algorithms</dd>
            <dt>
              class <a href="classtf_1_1FlowBuilder.html" class="m-doc">FlowBuilder</a>
            </dt>
            <dd>class to build a task dependency graph</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T&gt;</div>
              class <a href="classtf_1_1Future.html" class="m-doc">Future</a>
            </dt>
            <dd>class to access the result of an execution</dd>
            <dt>
              class <a href="classtf_1_1Graph.html" class="m-doc">Graph</a>
            </dt>
            <dd>class to create a graph object</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C = <a href="structtf_1_1DefaultClosureWrapper.html" class="m-doc">DefaultClosureWrapper</a>&gt;</div>
              class <a href="classtf_1_1GuidedPartitioner.html" class="m-doc">GuidedPartitioner</a>
            </dt>
            <dd>class to construct a guided partitioner for scheduling parallel algorithms</dd>
            <dt>
              class <a href="classtf_1_1ObserverInterface.html" class="m-doc">ObserverInterface</a>
            </dt>
            <dd>class to derive an executor observer</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C = <a href="structtf_1_1DefaultClosureWrapper.html" class="m-doc">DefaultClosureWrapper</a>&gt;</div>
              class <a href="classtf_1_1PartitionerBase.html" class="m-doc">PartitionerBase</a>
            </dt>
            <dd>class to derive a partitioner for scheduling parallel algorithms</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C = <a href="http://en.cppreference.com/w/cpp/utility/functional/function.html" class="m-doc-external">std::<wbr />function</a>&lt;void(<a href="classtf_1_1Pipeflow.html" class="m-doc">tf::<wbr />Pipeflow</a>&amp;)&gt;&gt;</div>
              class <a href="classtf_1_1Pipe.html" class="m-doc">Pipe</a>
            </dt>
            <dd>class to create a pipe object for a pipeline stage</dd>
            <dt>
              class <a href="classtf_1_1Pipeflow.html" class="m-doc">Pipeflow</a>
            </dt>
            <dd>class to create a pipeflow object used by the pipe callable</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename... Ps&gt;</div>
              class <a href="classtf_1_1Pipeline.html" class="m-doc">Pipeline</a>
            </dt>
            <dd>class to create a pipeline scheduling framework</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C = <a href="structtf_1_1DefaultClosureWrapper.html" class="m-doc">DefaultClosureWrapper</a>&gt;</div>
              class <a href="classtf_1_1RandomPartitioner.html" class="m-doc">RandomPartitioner</a>
            </dt>
            <dd>class to construct a random partitioner for scheduling parallel algorithms</dd>
            <dt>
              class <a href="classtf_1_1Runtime.html" class="m-doc">Runtime</a>
            </dt>
            <dd>class to include a runtime object in a task</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P&gt;</div>
              class <a href="classtf_1_1ScalablePipeline.html" class="m-doc">ScalablePipeline</a>
            </dt>
            <dd>class to create a scalable pipeline object</dd>
            <dt>
              class <a href="classtf_1_1Semaphore.html" class="m-doc">Semaphore</a>
            </dt>
            <dd>class to create a semophore object for building a concurrency constraint</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T, unsigned N = 2&gt;</div>
              class <a href="classtf_1_1SmallVector.html" class="m-doc">SmallVector</a>
            </dt>
            <dd>class to define a vector optimized for small array</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C = <a href="structtf_1_1DefaultClosureWrapper.html" class="m-doc">DefaultClosureWrapper</a>&gt;</div>
              class <a href="classtf_1_1StaticPartitioner.html" class="m-doc">StaticPartitioner</a>
            </dt>
            <dd>class to construct a static partitioner for scheduling parallel algorithms</dd>
            <dt>
              class <a href="classtf_1_1Subflow.html" class="m-doc">Subflow</a>
            </dt>
            <dd>class to construct a subflow graph from the execution of a dynamic task</dd>
            <dt>
              class <a href="classtf_1_1Task.html" class="m-doc">Task</a>
            </dt>
            <dd>class to create a task handle over a node in a taskflow graph</dd>
            <dt>
              class <a href="classtf_1_1Taskflow.html" class="m-doc">Taskflow</a>
            </dt>
            <dd>class to create a taskflow object</dd>
            <dt>
              struct <a href="structtf_1_1TaskParams.html" class="m-doc">TaskParams</a>
            </dt>
            <dd>task parameters to use when creating an asynchronous task</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T, unsigned TF_MAX_PRIORITY = static_cast&lt;unsigned&gt;(<a href="namespacetf.html#ac9f4add8f716ed323b0bdbbc1d89346fa26a4b44a837bf97b972628509912b4a5" class="m-doc">TaskPriority::<wbr />MAX</a>)&gt;</div>
              class <a href="classtf_1_1TaskQueue.html" class="m-doc">TaskQueue</a>
            </dt>
            <dd>class to create a lock-free unbounded single-producer multiple-consumer queue</dd>
            <dt>
              class <a href="classtf_1_1TaskView.html" class="m-doc">TaskView</a>
            </dt>
            <dd>class to access task information from the observer interface</dd>
            <dt>
              class <a href="classtf_1_1TFProfObserver.html" class="m-doc">TFProfObserver</a>
            </dt>
            <dd>class to create an observer based on the built-in taskflow profiler format</dd>
            <dt>
              class <a href="classtf_1_1Worker.html" class="m-doc">Worker</a>
            </dt>
            <dd>class to create a worker in an executor</dd>
            <dt>
              class <a href="classtf_1_1WorkerView.html" class="m-doc">WorkerView</a>
            </dt>
            <dd>class to create an immutable view of a worker in an executor</dd>
          </dl>
        </section>
        <section id="enum-members">
          <h2><a href="#enum-members">Enums</a></h2>
          <dl class="m-doc">
            <dt>
              <span class="m-doc-wrap-bumper">enum class <a href="#ac9f4add8f716ed323b0bdbbc1d89346f" class="m-doc">TaskPriority</a>: unsigned { </span><span class="m-doc-wrap"><a href="#ac9f4add8f716ed323b0bdbbc1d89346fab89de3b4b81c4facfac906edf29aec8c" class="m-doc">HIGH</a> = 0,
              <a href="#ac9f4add8f716ed323b0bdbbc1d89346fa1e23852820b9154316c7c06e2b7ba051" class="m-doc">NORMAL</a> = 1,
              <a href="#ac9f4add8f716ed323b0bdbbc1d89346fa41bc94cbd8eebea13ce0491b2ac11b88" class="m-doc">LOW</a> = 2,
              <a href="#ac9f4add8f716ed323b0bdbbc1d89346fa26a4b44a837bf97b972628509912b4a5" class="m-doc">MAX</a> = 3 }</span>
            </dt>
            <dd>enumeration of all task priority values</dd>
            <dt>
              <span class="m-doc-wrap-bumper">enum class <a href="#a1355048578785a80414707ff308b395a" class="m-doc">TaskType</a>: int { </span><span class="m-doc-wrap"><a href="#a1355048578785a80414707ff308b395aae54e6f6ba0c7cbb4eb7a2016e2f17842" class="m-doc">PLACEHOLDER</a> = 0,
              <a href="#a1355048578785a80414707ff308b395aafe6f99ef1ec99efbdc19a9786cf1facc" class="m-doc">STATIC</a>,
              <a href="#a1355048578785a80414707ff308b395aa46be697979903d784a70aeec45eb14ad" class="m-doc">SUBFLOW</a>,
              <a href="#a1355048578785a80414707ff308b395aa9f768c0bb1c3e84ca086a85211e978ac" class="m-doc">CONDITION</a>,
              <a href="#a1355048578785a80414707ff308b395aa1cf5e1f5569acda3c4a88a91c5130a69" class="m-doc">MODULE</a>,
              <a href="#a1355048578785a80414707ff308b395aabe553330beb7b3d994656e0a4e66cd96" class="m-doc">ASYNC</a>,
              <a href="#a1355048578785a80414707ff308b395aa0db45d2a4141101bdfe48e3314cfbca3" class="m-doc">UNDEFINED</a> }</span>
            </dt>
            <dd>enumeration of all task types</dd>
            <dt id="a192f7cb0fab2eb6f1c84f6046706435d">
              <span class="m-doc-wrap-bumper">enum class <a href="#a192f7cb0fab2eb6f1c84f6046706435d" class="m-doc-self">ObserverType</a>: int { </span><span class="m-doc-wrap"><a href="#a192f7cb0fab2eb6f1c84f6046706435dac19bf39d8838d00eddb556775fa8acce" class="m-doc">TFPROF</a> = 0,
              <a href="#a192f7cb0fab2eb6f1c84f6046706435da37960509766262569d504f02a0ee986d" class="m-doc">CHROME</a>,
              <a href="#a192f7cb0fab2eb6f1c84f6046706435da0db45d2a4141101bdfe48e3314cfbca3" class="m-doc">UNDEFINED</a> }</span>
            </dt>
            <dd>enumeration of all observer types</dd>
            <dt>
              <span class="m-doc-wrap-bumper">enum class <a href="#a32d51425fa23cd0dc3518c16cf3bb6c0" class="m-doc">PartitionerType</a>: int { </span><span class="m-doc-wrap"><a href="#a32d51425fa23cd0dc3518c16cf3bb6c0afe6f99ef1ec99efbdc19a9786cf1facc" class="m-doc">STATIC</a>,
              <a href="#a32d51425fa23cd0dc3518c16cf3bb6c0a0fcc90da4811c877ba9f9c12f7d60bc9" class="m-doc">DYNAMIC</a> }</span>
            </dt>
            <dd>enumeration of all partitioner types</dd>
            <dt>
              <span class="m-doc-wrap-bumper">enum class <a href="#abb7a11e41fd457f69e7ff45d4c769564" class="m-doc">PipeType</a>: int { </span><span class="m-doc-wrap"><a href="#abb7a11e41fd457f69e7ff45d4c769564adf13a99b035d6f0bce4f44ab18eec8eb" class="m-doc">PARALLEL</a> = 1,
              <a href="#abb7a11e41fd457f69e7ff45d4c769564a7b804a28d6154ab8007287532037f1d0" class="m-doc">SERIAL</a> = 2 }</span>
            </dt>
            <dd>enumeration of all pipe types</dd>
            <dt>
              <span class="m-doc-wrap-bumper">enum class <a href="#afebc56ae6d5765010d0dd13a5f04132e" class="m-doc">cudaTaskType</a>: int { </span><span class="m-doc-wrap"><a href="#afebc56ae6d5765010d0dd13a5f04132eaba2b45bdc11e2a4a6e86aab2ac693cbb" class="m-doc">EMPTY</a> = 0,
              <a href="#afebc56ae6d5765010d0dd13a5f04132eab9361011891280a44d85b967739cc6a5" class="m-doc">HOST</a>,
              <a href="#afebc56ae6d5765010d0dd13a5f04132ea41d4dbfd78ceea21abb0ecb03c3cc921" class="m-doc">MEMSET</a>,
              <a href="#afebc56ae6d5765010d0dd13a5f04132eac5d10cc70cce96265c445f14e7f5aba4" class="m-doc">MEMCPY</a>,
              <a href="#afebc56ae6d5765010d0dd13a5f04132ea35c10219c45ccfb5b07444fd7e17214c" class="m-doc">KERNEL</a>,
              <a href="#afebc56ae6d5765010d0dd13a5f04132ea46be697979903d784a70aeec45eb14ad" class="m-doc">SUBFLOW</a>,
              <a href="#afebc56ae6d5765010d0dd13a5f04132eab72f08e0732365cac9599b5c42157bf9" class="m-doc">CAPTURE</a>,
              <a href="#afebc56ae6d5765010d0dd13a5f04132ea0db45d2a4141101bdfe48e3314cfbca3" class="m-doc">UNDEFINED</a> }</span>
            </dt>
            <dd>enumeration of all cudaTask types</dd>
          </dl>
        </section>
        <section id="typedef-members">
          <h2><a href="#typedef-members">Typedefs</a></h2>
          <dl class="m-doc">
            <dt id="a8cff4bbd797dde4dfab096c3cc657833">
              using <a href="#a8cff4bbd797dde4dfab096c3cc657833" class="m-doc-self">observer_stamp_t</a> = <a href="http://en.cppreference.com/w/cpp/chrono/time_point.html" class="m-doc-external">std::<wbr />chrono::<wbr />time_point</a>&lt;<a href="http://en.cppreference.com/w/cpp/chrono/steady_clock.html" class="m-doc-external">std::<wbr />chrono::<wbr />steady_clock</a>&gt;
            </dt>
            <dd>default time point type of observers</dd>
            <dt>
              using <a href="#a66b72776c788898aee9e132b0ea9b405" class="m-doc">DefaultPartitioner</a> = <a href="classtf_1_1GuidedPartitioner.html" class="m-doc">GuidedPartitioner</a>&lt;&gt;
            </dt>
            <dd>default partitioner set to <a href="classtf_1_1GuidedPartitioner.html" class="m-doc">tf::<wbr />GuidedPartitioner</a></dd>
            <dt id="a0e267ab3e1baeb1962f3b3a374de9553">
              using <a href="#a0e267ab3e1baeb1962f3b3a374de9553" class="m-doc-self">cudaDefaultExecutionPolicy</a> = <a href="classtf_1_1cudaExecutionPolicy.html" class="m-doc">cudaExecutionPolicy</a>&lt;512, 7&gt;
            </dt>
            <dd>default execution policy</dd>
          </dl>
        </section>
        <section id="func-members">
          <h2><a href="#func-members">Functions</a></h2>
          <dl class="m-doc">
            <dt>
              <span class="m-doc-wrap-bumper">auto <a href="#a18c45bc96e6725943e0a4396fa59b524" class="m-doc">to_string</a>(</span><span class="m-doc-wrap"><a href="namespacetf.html#a1355048578785a80414707ff308b395a" class="m-doc">TaskType</a> type) -&gt; const char*</span>
            </dt>
            <dd>convert a task type to a human-readable string</dd>
            <dt id="ad8b1b906950270c6b7bc19e7074daa23">
              <span class="m-doc-wrap-bumper">auto <a href="#ad8b1b906950270c6b7bc19e7074daa23" class="m-doc-self">operator&lt;&lt;</a>(</span><span class="m-doc-wrap"><a href="http://en.cppreference.com/w/cpp/io/basic_ostream.html" class="m-doc-external">std::<wbr />ostream</a>&amp; os,
              const <a href="classtf_1_1Task.html" class="m-doc">Task</a>&amp; task) -&gt; <a href="http://en.cppreference.com/w/cpp/io/basic_ostream.html" class="m-doc-external">std::<wbr />ostream</a>&amp;</span>
            </dt>
            <dd>overload of ostream inserter operator for <a href="classtf_1_1Task.html" class="m-doc">Task</a></dd>
            <dt id="af62db51f0697f598e63eec0cfec7a585">
              <div class="m-doc-template">template&lt;typename I, std::enable_if_t&lt;std::is_same_v&lt;deref_t&lt;I&gt;, Semaphore&gt;, void&gt;* = nullptr&gt;</div>
              <span class="m-doc-wrap-bumper">auto <a href="#af62db51f0697f598e63eec0cfec7a585" class="m-doc-self">try_acquire</a>(</span><span class="m-doc-wrap">I begin,
              I end) -&gt; bool</span>
            </dt>
            <dd>tries to acquire all semaphores in the specified range</dd>
            <dt id="a642de1b981de4303e2ea61006d3d02c8">
              <div class="m-doc-template">template&lt;typename... S, std::enable_if_t&lt;all_same_v&lt;Semaphore, std::decay_t&lt;S&gt;...&gt;, void&gt;* = nullptr&gt;</div>
              <span class="m-doc-wrap-bumper">auto <a href="#a642de1b981de4303e2ea61006d3d02c8" class="m-doc-self">try_acquire</a>(</span><span class="m-doc-wrap">S &amp;&amp; ... semaphores) -&gt; bool</span>
            </dt>
            <dd>tries to acquire all semaphores</dd>
            <dt id="ab7ec159c370bc052effcd0cdbc48047e">
              <span class="m-doc-wrap-bumper">auto <a href="#ab7ec159c370bc052effcd0cdbc48047e" class="m-doc-self">to_string</a>(</span><span class="m-doc-wrap"><a href="namespacetf.html#a192f7cb0fab2eb6f1c84f6046706435d" class="m-doc">ObserverType</a> type) -&gt; const char*</span>
            </dt>
            <dd>convert an observer type to a human-readable string</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename Input, typename Output, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">auto <a href="#a8975fa5762088789adb0b60f38208309" class="m-doc">make_data_pipe</a>(</span><span class="m-doc-wrap"><a href="namespacetf.html#abb7a11e41fd457f69e7ff45d4c769564" class="m-doc">PipeType</a> d,
              C&amp;&amp; callable) -&gt; auto</span>
            </dt>
            <dd>function to construct a data pipe (<a href="classtf_1_1DataPipe.html" class="m-doc">tf::<wbr />DataPipe</a>)</dd>
            <dt id="abffa70155a5f160b7ceb86ee52ab2136">
              <span class="m-doc-wrap-bumper">auto <a href="#abffa70155a5f160b7ceb86ee52ab2136" class="m-doc-self">cuda_get_num_devices</a>(</span><span class="m-doc-wrap">) -&gt; size_t</span>
            </dt>
            <dd>queries the number of available devices</dd>
            <dt id="a235f5a9ce203d538eec1f4114221d473">
              <span class="m-doc-wrap-bumper">auto <a href="#a235f5a9ce203d538eec1f4114221d473" class="m-doc-self">cuda_get_device</a>(</span><span class="m-doc-wrap">) -&gt; int</span>
            </dt>
            <dd>gets the current device associated with the caller thread</dd>
            <dt id="ade2938289fa49aafc9b2b7b090deaa22">
              <span class="m-doc-wrap-bumper">void <a href="#ade2938289fa49aafc9b2b7b090deaa22" class="m-doc-self">cuda_set_device</a>(</span><span class="m-doc-wrap">int id)</span>
            </dt>
            <dd>switches to a given device context</dd>
            <dt id="a403b679694f4c85c857163b47e84d566">
              <span class="m-doc-wrap-bumper">void <a href="#a403b679694f4c85c857163b47e84d566" class="m-doc-self">cuda_get_device_property</a>(</span><span class="m-doc-wrap">int i,
              cudaDeviceProp&amp; p)</span>
            </dt>
            <dd>obtains the device property</dd>
            <dt id="a0e82b8a929e12349240276e34ec9f8c8">
              <span class="m-doc-wrap-bumper">auto <a href="#a0e82b8a929e12349240276e34ec9f8c8" class="m-doc-self">cuda_get_device_property</a>(</span><span class="m-doc-wrap">int i) -&gt; cudaDeviceProp</span>
            </dt>
            <dd>obtains the device property</dd>
            <dt id="aff8073c78daa741df76b530a0e602287">
              <span class="m-doc-wrap-bumper">void <a href="#aff8073c78daa741df76b530a0e602287" class="m-doc-self">cuda_dump_device_property</a>(</span><span class="m-doc-wrap"><a href="http://en.cppreference.com/w/cpp/io/basic_ostream.html" class="m-doc-external">std::<wbr />ostream</a>&amp; os,
              const cudaDeviceProp&amp; p)</span>
            </dt>
            <dd>dumps the device property</dd>
            <dt id="abf813f7ac4249d1b752d1b724f970deb">
              <span class="m-doc-wrap-bumper">auto <a href="#abf813f7ac4249d1b752d1b724f970deb" class="m-doc-self">cuda_get_device_max_threads_per_block</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the maximum threads per block on a device</dd>
            <dt id="af8184bb128c446fe383315f3dc15acf6">
              <span class="m-doc-wrap-bumper">auto <a href="#af8184bb128c446fe383315f3dc15acf6" class="m-doc-self">cuda_get_device_max_x_dim_per_block</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the maximum x-dimension per block on a device</dd>
            <dt id="a9aba5f29135b9da29015c2a367ab1d70">
              <span class="m-doc-wrap-bumper">auto <a href="#a9aba5f29135b9da29015c2a367ab1d70" class="m-doc-self">cuda_get_device_max_y_dim_per_block</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the maximum y-dimension per block on a device</dd>
            <dt id="a5580f59e633625b2f344bbf477d17c2f">
              <span class="m-doc-wrap-bumper">auto <a href="#a5580f59e633625b2f344bbf477d17c2f" class="m-doc-self">cuda_get_device_max_z_dim_per_block</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the maximum z-dimension per block on a device</dd>
            <dt id="a597579c8a9ab31244418e30a5aa74491">
              <span class="m-doc-wrap-bumper">auto <a href="#a597579c8a9ab31244418e30a5aa74491" class="m-doc-self">cuda_get_device_max_x_dim_per_grid</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the maximum x-dimension per grid on a device</dd>
            <dt id="a91d5c1609a7542949dd56d08b7c4c645">
              <span class="m-doc-wrap-bumper">auto <a href="#a91d5c1609a7542949dd56d08b7c4c645" class="m-doc-self">cuda_get_device_max_y_dim_per_grid</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the maximum y-dimension per grid on a device</dd>
            <dt id="a0373e32a20c7fc90c4f0461ee41bb918">
              <span class="m-doc-wrap-bumper">auto <a href="#a0373e32a20c7fc90c4f0461ee41bb918" class="m-doc-self">cuda_get_device_max_z_dim_per_grid</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the maximum z-dimension per grid on a device</dd>
            <dt id="aeca46ac171c4941a75aafddfe7546bfa">
              <span class="m-doc-wrap-bumper">auto <a href="#aeca46ac171c4941a75aafddfe7546bfa" class="m-doc-self">cuda_get_device_max_shm_per_block</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the maximum shared memory size in bytes per block on a device</dd>
            <dt id="aea1b2af1073496f047d6fb9984cff4f1">
              <span class="m-doc-wrap-bumper">auto <a href="#aea1b2af1073496f047d6fb9984cff4f1" class="m-doc-self">cuda_get_device_warp_size</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the warp size on a device</dd>
            <dt id="a1fb03793a6b8705026b80ef87599d4d5">
              <span class="m-doc-wrap-bumper">auto <a href="#a1fb03793a6b8705026b80ef87599d4d5" class="m-doc-self">cuda_get_device_compute_capability_major</a>(</span><span class="m-doc-wrap">int d) -&gt; int</span>
            </dt>
            <dd>queries the major number of compute capability of a device</dd>
            <dt id="a71f5177665f4f7e18984ccc57d625602">
              <span class="m-doc-wrap-bumper">auto <a href="#a71f5177665f4f7e18984ccc57d625602" class="m-doc-self">cuda_get_device_compute_capability_minor</a>(</span><span class="m-doc-wrap">int d) -&gt; int</span>
            </dt>
            <dd>queries the minor number of compute capability of a device</dd>
            <dt id="ad389294b4d1c14219d8d098f796e27c5">
              <span class="m-doc-wrap-bumper">auto <a href="#ad389294b4d1c14219d8d098f796e27c5" class="m-doc-self">cuda_get_device_unified_addressing</a>(</span><span class="m-doc-wrap">int d) -&gt; bool</span>
            </dt>
            <dd>queries if the device supports unified addressing</dd>
            <dt id="a43ac57f0eca3aa83c04bec3c4da9ab82">
              <span class="m-doc-wrap-bumper">auto <a href="#a43ac57f0eca3aa83c04bec3c4da9ab82" class="m-doc-self">cuda_get_driver_version</a>(</span><span class="m-doc-wrap">) -&gt; int</span>
            </dt>
            <dd>queries the latest CUDA version (1000 * major + 10 * minor) supported by the driver</dd>
            <dt id="a31258ad089c6f847c8cd636cd72d6949">
              <span class="m-doc-wrap-bumper">auto <a href="#a31258ad089c6f847c8cd636cd72d6949" class="m-doc-self">cuda_get_runtime_version</a>(</span><span class="m-doc-wrap">) -&gt; int</span>
            </dt>
            <dd>queries the CUDA <a href="classtf_1_1Runtime.html" class="m-doc">Runtime</a> version (1000 * major + 10 * minor)</dd>
            <dt id="a1effcf929b7e488925f9e12d74c8c62b">
              <span class="m-doc-wrap-bumper">auto <a href="#a1effcf929b7e488925f9e12d74c8c62b" class="m-doc-self">cuda_get_free_mem</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the free memory (expensive call)</dd>
            <dt id="a58bbc8d5d955582d6b5f7fdac51d010b">
              <span class="m-doc-wrap-bumper">auto <a href="#a58bbc8d5d955582d6b5f7fdac51d010b" class="m-doc-self">cuda_get_total_mem</a>(</span><span class="m-doc-wrap">int d) -&gt; size_t</span>
            </dt>
            <dd>queries the total available memory (expensive call)</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T&gt;</div>
              <span class="m-doc-wrap-bumper">auto <a href="#a2548e58af071bf1dbbbc945c84f237c9" class="m-doc">cuda_malloc_device</a>(</span><span class="m-doc-wrap">size_t N,
              int d) -&gt; T*</span>
            </dt>
            <dd>allocates memory on the given device for holding <code>N</code> elements of type <code>T</code></dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T&gt;</div>
              <span class="m-doc-wrap-bumper">auto <a href="#a76f4996669b2e81004749edbd3013d1a" class="m-doc">cuda_malloc_device</a>(</span><span class="m-doc-wrap">size_t N) -&gt; T*</span>
            </dt>
            <dd>allocates memory on the current device associated with the caller</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T&gt;</div>
              <span class="m-doc-wrap-bumper">auto <a href="#ad289846c38e3f122e1315d906243fc8b" class="m-doc">cuda_malloc_shared</a>(</span><span class="m-doc-wrap">size_t N) -&gt; T*</span>
            </dt>
            <dd>allocates shared memory for holding <code>N</code> elements of type <code>T</code></dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#ac7a8fe7456b888d6072ba94783c5003c" class="m-doc">cuda_free</a>(</span><span class="m-doc-wrap">T* ptr,
              int d)</span>
            </dt>
            <dd>frees memory on the GPU device</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename T&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#ae174a3a49b91ef21554dac16806f0d72" class="m-doc">cuda_free</a>(</span><span class="m-doc-wrap">T* ptr)</span>
            </dt>
            <dd>frees memory on the GPU device</dd>
            <dt>
              <span class="m-doc-wrap-bumper">void <a href="#aa4266474b921f8ed7d9ec8071fded2a4" class="m-doc">cuda_memcpy_async</a>(</span><span class="m-doc-wrap">cudaStream_t stream,
              void* dst,
              const void* src,
              size_t count)</span>
            </dt>
            <dd>copies data between host and device asynchronously through a stream</dd>
            <dt>
              <span class="m-doc-wrap-bumper">void <a href="#a6615554d2954e895755411ee444d9760" class="m-doc">cuda_memset_async</a>(</span><span class="m-doc-wrap">cudaStream_t stream,
              void* devPtr,
              int value,
              size_t count)</span>
            </dt>
            <dd>initializes or sets GPU memory to the given value byte by byte</dd>
            <dt id="a693cfec4e770c8d66fbd920ed8a07df0">
              <span class="m-doc-wrap-bumper">auto <a href="#a693cfec4e770c8d66fbd920ed8a07df0" class="m-doc-self">to_string</a>(</span><span class="m-doc-wrap"><a href="namespacetf.html#afebc56ae6d5765010d0dd13a5f04132e" class="m-doc">cudaTaskType</a> type) -&gt; const char* <span class="m-label m-flat m-primary">constexpr</span></span>
            </dt>
            <dd>convert a cuda_task type to a human-readable string</dd>
            <dt id="a29ae31d817e4080f4030c2b311ddafe9">
              <span class="m-doc-wrap-bumper">auto <a href="#a29ae31d817e4080f4030c2b311ddafe9" class="m-doc-self">operator&lt;&lt;</a>(</span><span class="m-doc-wrap"><a href="http://en.cppreference.com/w/cpp/io/basic_ostream.html" class="m-doc-external">std::<wbr />ostream</a>&amp; os,
              const <a href="classtf_1_1cudaTask.html" class="m-doc">cudaTask</a>&amp; ct) -&gt; <a href="http://en.cppreference.com/w/cpp/io/basic_ostream.html" class="m-doc-external">std::<wbr />ostream</a>&amp;</span>
            </dt>
            <dd>overload of ostream inserter operator for <a href="classtf_1_1cudaTask.html" class="m-doc">cudaTask</a></dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a2ff1cf81426c856fc6db1f6ead47878f" class="m-doc">cuda_single_task</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              C c)</span>
            </dt>
            <dd>runs a callable asynchronously using one kernel thread</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a7c449cec0b93503b8280d05add35e9f4" class="m-doc">cuda_for_each</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              C c)</span>
            </dt>
            <dd>performs asynchronous parallel iterations over a range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a01ad7ce62fa6f42f2f2fbff3659b7884" class="m-doc">cuda_for_each_index</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              I inc,
              C c)</span>
            </dt>
            <dd>performs asynchronous parallel iterations over an index-based range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename O, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a3ed764530620a419e3400e1f9ab6c956" class="m-doc">cuda_transform</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C op)</span>
            </dt>
            <dd>performs asynchronous parallel transforms over a range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I1, typename I2, typename O, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#abdcb5b755f7ace2aa452541d5bf93b5f" class="m-doc">cuda_transform</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I1 first1,
              I1 last1,
              I2 first2,
              O output,
              C op)</span>
            </dt>
            <dd>performs asynchronous parallel transforms over two ranges of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename T, typename O&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a8a872d2a0ac73a676713cb5be5aa688c" class="m-doc">cuda_reduce</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              T* res,
              O op,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous parallel reduction over a range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename T, typename O&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a492e8410db032a0273a99dd905486161" class="m-doc">cuda_uninitialized_reduce</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              T* res,
              O op,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous parallel reduction over a range of items without an initial value</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename T, typename O, typename U&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a4463d06240d608bc31d8b3546a851e4e" class="m-doc">cuda_transform_reduce</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              T* res,
              O bop,
              U uop,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous parallel reduction over a range of transformed items without an initial value</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename T, typename O, typename U&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#aa451668b7a0a3abf385cf2abebed8962" class="m-doc">cuda_uninitialized_transform_reduce</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              T* res,
              O bop,
              U uop,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous parallel reduction over a range of transformed items with an initial value</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename O, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a2e1b44c84a09e0a8495a611cb9a7ea40" class="m-doc">cuda_inclusive_scan</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C op,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous inclusive scan over a range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename O, typename C, typename U&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#afa4aa760ddb6efbda1b9bab505ad5baf" class="m-doc">cuda_transform_inclusive_scan</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C bop,
              U uop,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous inclusive scan over a range of transformed items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename O, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#aeb391c40120844318fd715b8c3a716bb" class="m-doc">cuda_exclusive_scan</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C op,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous exclusive scan over a range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename O, typename C, typename U&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a2e739895c1c73538967af060ca714366" class="m-doc">cuda_transform_exclusive_scan</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C bop,
              U uop,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous exclusive scan over a range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename a_keys_it, typename a_vals_it, typename b_keys_it, typename b_vals_it, typename c_keys_it, typename c_vals_it, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#aa84d4c68d2cbe9f6efc4a1eb1a115458" class="m-doc">cuda_merge_by_key</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              a_keys_it a_keys_first,
              a_keys_it a_keys_last,
              a_vals_it a_vals_first,
              b_keys_it b_keys_first,
              b_keys_it b_keys_last,
              b_vals_it b_vals_first,
              c_keys_it c_keys_first,
              c_vals_it c_vals_first,
              C comp,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous key-value merge over a range of keys and values</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename a_keys_it, typename b_keys_it, typename c_keys_it, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a37ec481149c2f01669353033d75ed72a" class="m-doc">cuda_merge</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              a_keys_it a_keys_first,
              a_keys_it a_keys_last,
              b_keys_it b_keys_first,
              b_keys_it b_keys_last,
              c_keys_it c_keys_first,
              C comp,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous key-only merge over a range of keys</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename K, typename V = cudaEmpty&gt;</div>
              <span class="m-doc-wrap-bumper">auto <a href="#a9c69906a4dfd1e2d0cd7ed496d29dafd" class="m-doc">cuda_sort_buffer_size</a>(</span><span class="m-doc-wrap">unsigned count) -&gt; unsigned</span>
            </dt>
            <dd>queries the buffer size in bytes needed to call sort kernels for the given number of elements</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename K_it, typename V_it, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a3461b9179221dd7230ce2a0e45156c7f" class="m-doc">cuda_sort_by_key</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              K_it k_first,
              K_it k_last,
              V_it v_first,
              C comp,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous key-value sort on a range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename K_it, typename C&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a06804cb1598e965febc7bd35fc0fbbb0" class="m-doc">cuda_sort</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              K_it k_first,
              K_it k_last,
              C comp,
              void* buf)</span>
            </dt>
            <dd>performs asynchronous key-only sort on a range of items</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename U&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a5f9dabd7c5d0fa5166cf76d9fa5a038e" class="m-doc">cuda_find_if</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              unsigned* idx,
              U op)</span>
            </dt>
            <dd>finds the index of the first element that satisfies the given criteria</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename O&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a572c13198191c46765264f8afabe2e9f" class="m-doc">cuda_min_element</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              unsigned* idx,
              O op,
              void* buf)</span>
            </dt>
            <dd>finds the index of the minimum element in a range</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P, typename I, typename O&gt;</div>
              <span class="m-doc-wrap-bumper">void <a href="#a3fc577fd0a8f127770bcf68bc56c073e" class="m-doc">cuda_max_element</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              unsigned* idx,
              O op,
              void* buf)</span>
            </dt>
            <dd>finds the index of the maximum element in a range</dd>
            <dt>
              <span class="m-doc-wrap-bumper">auto <a href="#a06790e5f6898894392f247309626e1b4" class="m-doc">version</a>(</span><span class="m-doc-wrap">) -&gt; const char* <span class="m-label m-flat m-primary">constexpr</span></span>
            </dt>
            <dd>queries the version information in a string format <code>major.minor.patch</code></dd>
          </dl>
        </section>
        <section id="var-members">
          <h2><a href="#var-members">Variables</a></h2>
          <dl class="m-doc">
            <dt>
              <div class="m-doc-template">template&lt;typename P&gt;</div>
              bool <a href="#ad3a41adc2499a9519da3e77dc3e9849c" class="m-doc">is_task_params_v</a> <span class="m-label m-flat m-primary">constexpr</span>
            </dt>
            <dd>determines if the given type is a task parameter type</dd>
            <dt id="a872cf263ab68abc7c3180710fb792528">
              <a href="http://en.cppreference.com/w/cpp/container/array.html" class="m-doc-external">std::<wbr />array</a>&lt;<a href="namespacetf.html#a1355048578785a80414707ff308b395a" class="m-doc">TaskType</a>, 6&gt; <a href="#a872cf263ab68abc7c3180710fb792528" class="m-doc-self">TASK_TYPES</a> <span class="m-label m-flat m-primary">constexpr</span>
            </dt>
            <dd>array of all task types (used for iterating task types)</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C&gt;</div>
              bool <a href="#aefeb96086f4a99f0e58a0f321012a52c" class="m-doc">is_subflow_task_v</a> <span class="m-label m-flat m-primary">constexpr</span>
            </dt>
            <dd>determines if a callable is a dynamic task</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C&gt;</div>
              bool <a href="#a00ca2fc2de0e679a7d9b8039340343df" class="m-doc">is_condition_task_v</a> <span class="m-label m-flat m-primary">constexpr</span>
            </dt>
            <dd>determines if a callable is a condition task</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C&gt;</div>
              bool <a href="#a78c40dc8776735b0f2c27cd446481aff" class="m-doc">is_multi_condition_task_v</a> <span class="m-label m-flat m-primary">constexpr</span>
            </dt>
            <dd>determines if a callable is a multi-condition task</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename C&gt;</div>
              bool <a href="#a11fc9c98eb3a0d3a9aa55598b1f4d614" class="m-doc">is_static_task_v</a> <span class="m-label m-flat m-primary">constexpr</span>
            </dt>
            <dd>determines if a callable is a static task</dd>
            <dt>
              <div class="m-doc-template">template&lt;typename P&gt;</div>
              bool <a href="#a73c20705fc54763f195a00b6e626e301" class="m-doc">is_partitioner_v</a> <span class="m-label m-flat m-primary">constexpr</span>
            </dt>
            <dd>determines if a type is a partitioner</dd>
          </dl>
        </section>
        <section>
          <h2>Enum documentation</h2>
          <section class="m-doc-details" id="ac9f4add8f716ed323b0bdbbc1d89346f"><div>
            <h3>
              enum class tf::<wbr /><a href="#ac9f4add8f716ed323b0bdbbc1d89346f" class="m-doc-self">TaskPriority</a>: unsigned
            </h3>
            <p>enumeration of all task priority values</p>
<p>A priority is an enumerated value of type <code>unsigned</code>. Currently, Taskflow defines three priority levels, <code>HIGH</code>, <code>NORMAL</code>, and <code>LOW</code>, starting from 0, 1, to 2. That is, the lower the value, the higher the priority.</p>
            <table class="m-table m-fullwidth m-flat m-doc">
              <thead><tr><th style="width: 1%">Enumerators</th><th></th></tr></thead>
              <tbody>
                <tr>
                  <td><a href="#ac9f4add8f716ed323b0bdbbc1d89346fab89de3b4b81c4facfac906edf29aec8c" class="m-doc-self" id="ac9f4add8f716ed323b0bdbbc1d89346fab89de3b4b81c4facfac906edf29aec8c">HIGH</a></td>
                  <td>
                  <p>value of the highest priority (i.e., 0)<br /></p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#ac9f4add8f716ed323b0bdbbc1d89346fa1e23852820b9154316c7c06e2b7ba051" class="m-doc-self" id="ac9f4add8f716ed323b0bdbbc1d89346fa1e23852820b9154316c7c06e2b7ba051">NORMAL</a></td>
                  <td>
                  <p>value of the normal priority (i.e., 1)<br /></p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#ac9f4add8f716ed323b0bdbbc1d89346fa41bc94cbd8eebea13ce0491b2ac11b88" class="m-doc-self" id="ac9f4add8f716ed323b0bdbbc1d89346fa41bc94cbd8eebea13ce0491b2ac11b88">LOW</a></td>
                  <td>
                  <p>value of the lowest priority (i.e., 2)</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#ac9f4add8f716ed323b0bdbbc1d89346fa26a4b44a837bf97b972628509912b4a5" class="m-doc-self" id="ac9f4add8f716ed323b0bdbbc1d89346fa26a4b44a837bf97b972628509912b4a5">MAX</a></td>
                  <td>
                  <p>conventional value for iterating priority values</p>
                  </td>
                </tr>
              </tbody>
            </table>
          </div></section>
          <section class="m-doc-details" id="a1355048578785a80414707ff308b395a"><div>
            <h3>
              enum class tf::<wbr /><a href="#a1355048578785a80414707ff308b395a" class="m-doc-self">TaskType</a>: int
            </h3>
            <p>enumeration of all task types</p>
            <table class="m-table m-fullwidth m-flat m-doc">
              <thead><tr><th style="width: 1%">Enumerators</th><th></th></tr></thead>
              <tbody>
                <tr>
                  <td><a href="#a1355048578785a80414707ff308b395aae54e6f6ba0c7cbb4eb7a2016e2f17842" class="m-doc-self" id="a1355048578785a80414707ff308b395aae54e6f6ba0c7cbb4eb7a2016e2f17842">PLACEHOLDER</a></td>
                  <td>
                  <p>placeholder task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#a1355048578785a80414707ff308b395aafe6f99ef1ec99efbdc19a9786cf1facc" class="m-doc-self" id="a1355048578785a80414707ff308b395aafe6f99ef1ec99efbdc19a9786cf1facc">STATIC</a></td>
                  <td>
                  <p>static task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#a1355048578785a80414707ff308b395aa46be697979903d784a70aeec45eb14ad" class="m-doc-self" id="a1355048578785a80414707ff308b395aa46be697979903d784a70aeec45eb14ad">SUBFLOW</a></td>
                  <td>
                  <p>dynamic (subflow) task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#a1355048578785a80414707ff308b395aa9f768c0bb1c3e84ca086a85211e978ac" class="m-doc-self" id="a1355048578785a80414707ff308b395aa9f768c0bb1c3e84ca086a85211e978ac">CONDITION</a></td>
                  <td>
                  <p>condition task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#a1355048578785a80414707ff308b395aa1cf5e1f5569acda3c4a88a91c5130a69" class="m-doc-self" id="a1355048578785a80414707ff308b395aa1cf5e1f5569acda3c4a88a91c5130a69">MODULE</a></td>
                  <td>
                  <p>module task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#a1355048578785a80414707ff308b395aabe553330beb7b3d994656e0a4e66cd96" class="m-doc-self" id="a1355048578785a80414707ff308b395aabe553330beb7b3d994656e0a4e66cd96">ASYNC</a></td>
                  <td>
                  <p>asynchronous task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#a1355048578785a80414707ff308b395aa0db45d2a4141101bdfe48e3314cfbca3" class="m-doc-self" id="a1355048578785a80414707ff308b395aa0db45d2a4141101bdfe48e3314cfbca3">UNDEFINED</a></td>
                  <td>
                  <p>undefined task type (for internal use only)</p>
                  </td>
                </tr>
              </tbody>
            </table>
          </div></section>
          <section class="m-doc-details" id="a32d51425fa23cd0dc3518c16cf3bb6c0"><div>
            <h3>
              enum class tf::<wbr /><a href="#a32d51425fa23cd0dc3518c16cf3bb6c0" class="m-doc-self">PartitionerType</a>: int
            </h3>
            <p>enumeration of all partitioner types</p>
            <table class="m-table m-fullwidth m-flat m-doc">
              <thead><tr><th style="width: 1%">Enumerators</th><th></th></tr></thead>
              <tbody>
                <tr>
                  <td><a href="#a32d51425fa23cd0dc3518c16cf3bb6c0afe6f99ef1ec99efbdc19a9786cf1facc" class="m-doc-self" id="a32d51425fa23cd0dc3518c16cf3bb6c0afe6f99ef1ec99efbdc19a9786cf1facc">STATIC</a></td>
                  <td>
                  <p>static partitioner type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#a32d51425fa23cd0dc3518c16cf3bb6c0a0fcc90da4811c877ba9f9c12f7d60bc9" class="m-doc-self" id="a32d51425fa23cd0dc3518c16cf3bb6c0a0fcc90da4811c877ba9f9c12f7d60bc9">DYNAMIC</a></td>
                  <td>
                  <p>dynamic partitioner type</p>
                  </td>
                </tr>
              </tbody>
            </table>
          </div></section>
          <section class="m-doc-details" id="abb7a11e41fd457f69e7ff45d4c769564"><div>
            <h3>
              enum class tf::<wbr /><a href="#abb7a11e41fd457f69e7ff45d4c769564" class="m-doc-self">PipeType</a>: int
            </h3>
            <p>enumeration of all pipe types</p>
            <table class="m-table m-fullwidth m-flat m-doc">
              <thead><tr><th style="width: 1%">Enumerators</th><th></th></tr></thead>
              <tbody>
                <tr>
                  <td><a href="#abb7a11e41fd457f69e7ff45d4c769564adf13a99b035d6f0bce4f44ab18eec8eb" class="m-doc-self" id="abb7a11e41fd457f69e7ff45d4c769564adf13a99b035d6f0bce4f44ab18eec8eb">PARALLEL</a></td>
                  <td>
                  <p>parallel type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#abb7a11e41fd457f69e7ff45d4c769564a7b804a28d6154ab8007287532037f1d0" class="m-doc-self" id="abb7a11e41fd457f69e7ff45d4c769564a7b804a28d6154ab8007287532037f1d0">SERIAL</a></td>
                  <td>
                  <p>serial type</p>
                  </td>
                </tr>
              </tbody>
            </table>
          </div></section>
          <section class="m-doc-details" id="afebc56ae6d5765010d0dd13a5f04132e"><div>
            <h3>
              enum class tf::<wbr /><a href="#afebc56ae6d5765010d0dd13a5f04132e" class="m-doc-self">cudaTaskType</a>: int
            </h3>
            <p>enumeration of all cudaTask types</p>
            <table class="m-table m-fullwidth m-flat m-doc">
              <thead><tr><th style="width: 1%">Enumerators</th><th></th></tr></thead>
              <tbody>
                <tr>
                  <td><a href="#afebc56ae6d5765010d0dd13a5f04132eaba2b45bdc11e2a4a6e86aab2ac693cbb" class="m-doc-self" id="afebc56ae6d5765010d0dd13a5f04132eaba2b45bdc11e2a4a6e86aab2ac693cbb">EMPTY</a></td>
                  <td>
                  <p>empty task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#afebc56ae6d5765010d0dd13a5f04132eab9361011891280a44d85b967739cc6a5" class="m-doc-self" id="afebc56ae6d5765010d0dd13a5f04132eab9361011891280a44d85b967739cc6a5">HOST</a></td>
                  <td>
                  <p>host task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#afebc56ae6d5765010d0dd13a5f04132ea41d4dbfd78ceea21abb0ecb03c3cc921" class="m-doc-self" id="afebc56ae6d5765010d0dd13a5f04132ea41d4dbfd78ceea21abb0ecb03c3cc921">MEMSET</a></td>
                  <td>
                  <p>memory set task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#afebc56ae6d5765010d0dd13a5f04132eac5d10cc70cce96265c445f14e7f5aba4" class="m-doc-self" id="afebc56ae6d5765010d0dd13a5f04132eac5d10cc70cce96265c445f14e7f5aba4">MEMCPY</a></td>
                  <td>
                  <p>memory copy task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#afebc56ae6d5765010d0dd13a5f04132ea35c10219c45ccfb5b07444fd7e17214c" class="m-doc-self" id="afebc56ae6d5765010d0dd13a5f04132ea35c10219c45ccfb5b07444fd7e17214c">KERNEL</a></td>
                  <td>
                  <p>memory copy task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#afebc56ae6d5765010d0dd13a5f04132ea46be697979903d784a70aeec45eb14ad" class="m-doc-self" id="afebc56ae6d5765010d0dd13a5f04132ea46be697979903d784a70aeec45eb14ad">SUBFLOW</a></td>
                  <td>
                  <p>subflow (child graph) task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#afebc56ae6d5765010d0dd13a5f04132eab72f08e0732365cac9599b5c42157bf9" class="m-doc-self" id="afebc56ae6d5765010d0dd13a5f04132eab72f08e0732365cac9599b5c42157bf9">CAPTURE</a></td>
                  <td>
                  <p>capture task type</p>
                  </td>
                </tr>
                <tr>
                  <td><a href="#afebc56ae6d5765010d0dd13a5f04132ea0db45d2a4141101bdfe48e3314cfbca3" class="m-doc-self" id="afebc56ae6d5765010d0dd13a5f04132ea0db45d2a4141101bdfe48e3314cfbca3">UNDEFINED</a></td>
                  <td>
                  <p>undefined task type</p>
                  </td>
                </tr>
              </tbody>
            </table>
          </div></section>
        </section>
        <section>
          <h2>Typedef documentation</h2>
          <section class="m-doc-details" id="a66b72776c788898aee9e132b0ea9b405"><div>
            <h3>
              using tf::<wbr /><a href="#a66b72776c788898aee9e132b0ea9b405" class="m-doc-self">DefaultPartitioner</a> = <a href="classtf_1_1GuidedPartitioner.html" class="m-doc">GuidedPartitioner</a>&lt;&gt;
            </h3>
            <p>default partitioner set to <a href="classtf_1_1GuidedPartitioner.html" class="m-doc">tf::<wbr />GuidedPartitioner</a></p>
<p>Guided partitioner can achieve decent performance for most parallel algorithms, especially for those with irregular and unbalanced workload per iteration.</p>
          </div></section>
        </section>
        <section>
          <h2>Function documentation</h2>
          <section class="m-doc-details" id="a18c45bc96e6725943e0a4396fa59b524"><div>
            <h3>
              <span class="m-doc-wrap-bumper">const char* tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a18c45bc96e6725943e0a4396fa59b524" class="m-doc-self">to_string</a>(</span><span class="m-doc-wrap"><a href="namespacetf.html#a1355048578785a80414707ff308b395a" class="m-doc">TaskType</a> type)</span></span>
            </h3>
            <p>convert a task type to a human-readable string</p>
<p>The name of each task type is the litte-case string of its characters.</p><pre class="m-code"><span class="n">TaskType</span><span class="o">::</span><span class="n">PLACEHOLDER</span><span class="w">     </span><span class="o">-&gt;</span><span class="w">  </span><span class="s">&quot;placeholder&quot;</span>
<span class="n">TaskType</span><span class="o">::</span><span class="n">STATIC</span><span class="w">          </span><span class="o">-&gt;</span><span class="w">  </span><span class="s">&quot;static&quot;</span>
<span class="n">TaskType</span><span class="o">::</span><span class="n">SUBFLOW</span><span class="w">         </span><span class="o">-&gt;</span><span class="w">  </span><span class="s">&quot;subflow&quot;</span>
<span class="n">TaskType</span><span class="o">::</span><span class="n">CONDITION</span><span class="w">       </span><span class="o">-&gt;</span><span class="w">  </span><span class="s">&quot;condition&quot;</span>
<span class="n">TaskType</span><span class="o">::</span><span class="n">MODULE</span><span class="w">          </span><span class="o">-&gt;</span><span class="w">  </span><span class="s">&quot;module&quot;</span>
<span class="n">TaskType</span><span class="o">::</span><span class="n">ASYNC</span><span class="w">           </span><span class="o">-&gt;</span><span class="w">  </span><span class="s">&quot;async&quot;</span></pre>
          </div></section>
          <section class="m-doc-details" id="a8975fa5762088789adb0b60f38208309"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename Input, typename Output, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">auto tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a8975fa5762088789adb0b60f38208309" class="m-doc-self">make_data_pipe</a>(</span><span class="m-doc-wrap"><a href="namespacetf.html#abb7a11e41fd457f69e7ff45d4c769564" class="m-doc">PipeType</a> d,
              C&amp;&amp; callable)</span></span>
            </h3>
            <p>function to construct a data pipe (<a href="classtf_1_1DataPipe.html" class="m-doc">tf::<wbr />DataPipe</a>)</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">Input</td>
                  <td>input data type</td>
                </tr>
                <tr>
                  <td>Output</td>
                  <td>output data type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>callable type</td>
                </tr>
              </tbody>
            </table>
<p><a href="namespacetf.html#a8975fa5762088789adb0b60f38208309" class="m-doc">tf::<wbr />make_data_pipe</a> is a helper function to create a data pipe (<a href="classtf_1_1DataPipe.html" class="m-doc">tf::<wbr />DataPipe</a>) in a data-parallel pipeline (<a href="classtf_1_1DataPipeline.html" class="m-doc">tf::<wbr />DataPipeline</a>). The first argument specifies the direction of the data pipe, either <a href="namespacetf.html#abb7a11e41fd457f69e7ff45d4c769564a7b804a28d6154ab8007287532037f1d0" class="m-doc">tf::<wbr />PipeType::<wbr />SERIAL</a> or <a href="namespacetf.html#abb7a11e41fd457f69e7ff45d4c769564adf13a99b035d6f0bce4f44ab18eec8eb" class="m-doc">tf::<wbr />PipeType::<wbr />PARALLEL</a>, and the second argument is a callable to invoke by the pipeline scheduler. Input and output data types are specified via template parameters, which will always be decayed by the library to its original form for storage purpose. The callable must take the input data type in its first argument and returns a value of the output data type.</p><pre class="m-code"><span class="n">tf</span><span class="o">::</span><span class="n">make_data_pipe</span><span class="o">&lt;</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&gt;</span><span class="p">(</span>
<span class="w">  </span><span class="n">tf</span><span class="o">::</span><span class="n">PipeType</span><span class="o">::</span><span class="n">SERIAL</span><span class="p">,</span><span class="w"> </span>
<span class="w">  </span><span class="p">[](</span><span class="kt">int</span><span class="o">&amp;</span><span class="w"> </span><span class="n">input</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">    </span><span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">to_string</span><span class="p">(</span><span class="n">input</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">100</span><span class="p">);</span>
<span class="w">  </span><span class="p">}</span>
<span class="p">);</span></pre><p>The callable can additionally take a reference of <a href="classtf_1_1Pipeflow.html" class="m-doc">tf::<wbr />Pipeflow</a>, which allows you to query the runtime information of a stage task, such as its line number and token number.</p><pre class="m-code"><span class="n">tf</span><span class="o">::</span><span class="n">make_data_pipe</span><span class="o">&lt;</span><span class="kt">int</span><span class="p">,</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">string</span><span class="o">&gt;</span><span class="p">(</span>
<span class="w">  </span><span class="n">tf</span><span class="o">::</span><span class="n">PipeType</span><span class="o">::</span><span class="n">SERIAL</span><span class="p">,</span><span class="w"> </span>
<span class="w">  </span><span class="p">[](</span><span class="kt">int</span><span class="o">&amp;</span><span class="w"> </span><span class="n">input</span><span class="p">,</span><span class="w"> </span><span class="n">tf</span><span class="o">::</span><span class="n">Pipeflow</span><span class="o">&amp;</span><span class="w"> </span><span class="n">pf</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">    </span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;token=%lu, line=%lu</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span><span class="w"> </span><span class="n">pf</span><span class="p">.</span><span class="n">token</span><span class="p">(),</span><span class="w"> </span><span class="n">pf</span><span class="p">.</span><span class="n">line</span><span class="p">());</span>
<span class="w">    </span><span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">to_string</span><span class="p">(</span><span class="n">input</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">100</span><span class="p">);</span>
<span class="w">  </span><span class="p">}</span>
<span class="p">);</span></pre>
          </div></section>
          <section class="m-doc-details" id="a2548e58af071bf1dbbbc945c84f237c9"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename T&gt;
              </div>
              <span class="m-doc-wrap-bumper">T* tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a2548e58af071bf1dbbbc945c84f237c9" class="m-doc-self">cuda_malloc_device</a>(</span><span class="m-doc-wrap">size_t N,
              int d)</span></span>
            </h3>
            <p>allocates memory on the given device for holding <code>N</code> elements of type <code>T</code></p>
<p>The function calls <code>cudaMalloc</code> to allocate <code>N*sizeof(T)</code> bytes of memory on the given device <code>d</code> and returns a pointer to the starting address of the device memory.</p>
          </div></section>
          <section class="m-doc-details" id="a76f4996669b2e81004749edbd3013d1a"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename T&gt;
              </div>
              <span class="m-doc-wrap-bumper">T* tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a76f4996669b2e81004749edbd3013d1a" class="m-doc-self">cuda_malloc_device</a>(</span><span class="m-doc-wrap">size_t N)</span></span>
            </h3>
            <p>allocates memory on the current device associated with the caller</p>
<p>The function calls malloc_device from the current device associated with the caller.</p>
          </div></section>
          <section class="m-doc-details" id="ad289846c38e3f122e1315d906243fc8b"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename T&gt;
              </div>
              <span class="m-doc-wrap-bumper">T* tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#ad289846c38e3f122e1315d906243fc8b" class="m-doc-self">cuda_malloc_shared</a>(</span><span class="m-doc-wrap">size_t N)</span></span>
            </h3>
            <p>allocates shared memory for holding <code>N</code> elements of type <code>T</code></p>
<p>The function calls <code>cudaMallocManaged</code> to allocate <code>N*sizeof(T)</code> bytes of memory and returns a pointer to the starting address of the shared memory.</p>
          </div></section>
          <section class="m-doc-details" id="ac7a8fe7456b888d6072ba94783c5003c"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename T&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#ac7a8fe7456b888d6072ba94783c5003c" class="m-doc-self">cuda_free</a>(</span><span class="m-doc-wrap">T* ptr,
              int d)</span></span>
            </h3>
            <p>frees memory on the GPU device</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">T</td>
                  <td>pointer type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>ptr</td>
                  <td>device pointer to memory to free</td>
                </tr>
                <tr>
                  <td>d</td>
                  <td>device context identifier</td>
                </tr>
              </tbody>
            </table>
<p>This methods call <code>cudaFree</code> to free the memory space pointed to by <code>ptr</code> using the given device context.</p>
          </div></section>
          <section class="m-doc-details" id="ae174a3a49b91ef21554dac16806f0d72"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename T&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#ae174a3a49b91ef21554dac16806f0d72" class="m-doc-self">cuda_free</a>(</span><span class="m-doc-wrap">T* ptr)</span></span>
            </h3>
            <p>frees memory on the GPU device</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">T</td>
                  <td>pointer type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>ptr</td>
                  <td>device pointer to memory to free</td>
                </tr>
              </tbody>
            </table>
<p>This methods call <code>cudaFree</code> to free the memory space pointed to by <code>ptr</code> using the current device context of the caller.</p>
          </div></section>
          <section class="m-doc-details" id="aa4266474b921f8ed7d9ec8071fded2a4"><div>
            <h3>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#aa4266474b921f8ed7d9ec8071fded2a4" class="m-doc-self">cuda_memcpy_async</a>(</span><span class="m-doc-wrap">cudaStream_t stream,
              void* dst,
              const void* src,
              size_t count)</span></span>
            </h3>
            <p>copies data between host and device asynchronously through a stream</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">stream</td>
                  <td>stream identifier</td>
                </tr>
                <tr>
                  <td>dst</td>
                  <td>destination memory address</td>
                </tr>
                <tr>
                  <td>src</td>
                  <td>source memory address</td>
                </tr>
                <tr>
                  <td>count</td>
                  <td>size in bytes to copy</td>
                </tr>
              </tbody>
            </table>
<p>The method calls <code>cudaMemcpyAsync</code> with the given <code>stream</code> using <code>cudaMemcpyDefault</code> to infer the memory space of the source and the destination pointers. The memory areas may not overlap.</p>
          </div></section>
          <section class="m-doc-details" id="a6615554d2954e895755411ee444d9760"><div>
            <h3>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a6615554d2954e895755411ee444d9760" class="m-doc-self">cuda_memset_async</a>(</span><span class="m-doc-wrap">cudaStream_t stream,
              void* devPtr,
              int value,
              size_t count)</span></span>
            </h3>
            <p>initializes or sets GPU memory to the given value byte by byte</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">stream</td>
                  <td>stream identifier</td>
                </tr>
                <tr>
                  <td>devPtr</td>
                  <td>pointer to GPU mempry</td>
                </tr>
                <tr>
                  <td>value</td>
                  <td>value to set for each byte of the specified memory</td>
                </tr>
                <tr>
                  <td>count</td>
                  <td>size in bytes to set</td>
                </tr>
              </tbody>
            </table>
<p>The method calls <code>cudaMemsetAsync</code> with the given <code>stream</code> to fill the first <code>count</code> bytes of the memory area pointed to by <code>devPtr</code> with the constant byte value <code>value</code>.</p>
          </div></section>
          <section class="m-doc-details" id="a2ff1cf81426c856fc6db1f6ead47878f"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a2ff1cf81426c856fc6db1f6ead47878f" class="m-doc-self">cuda_single_task</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              C c)</span></span>
            </h3>
            <p>runs a callable asynchronously using one kernel thread</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>closure type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>c</td>
                  <td>closure to run by one kernel thread</td>
                </tr>
              </tbody>
            </table>
<p>The function launches a single kernel thread to run the given callable through the stream in the execution policy object.</p>
          </div></section>
          <section class="m-doc-details" id="a7c449cec0b93503b8280d05add35e9f4"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a7c449cec0b93503b8280d05add35e9f4" class="m-doc-self">cuda_for_each</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              C c)</span></span>
            </h3>
            <p>performs asynchronous parallel iterations over a range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>unary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy object</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>c</td>
                  <td>unary operator to apply to each dereferenced iterator</td>
                </tr>
              </tbody>
            </table>
<p>This function is equivalent to a parallel execution of the following loop on a GPU:</p><pre class="m-code"><span class="k">for</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">itr</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">first</span><span class="p">;</span><span class="w"> </span><span class="n">itr</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">;</span><span class="w"> </span><span class="n">itr</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="n">c</span><span class="p">(</span><span class="o">*</span><span class="n">itr</span><span class="p">);</span>
<span class="p">}</span></pre>
          </div></section>
          <section class="m-doc-details" id="a01ad7ce62fa6f42f2f2fbff3659b7884"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a01ad7ce62fa6f42f2f2fbff3659b7884" class="m-doc-self">cuda_for_each_index</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              I inc,
              C c)</span></span>
            </h3>
            <p>performs asynchronous parallel iterations over an index-based range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input index type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>unary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy object</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>index to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>index to the end of the range</td>
                </tr>
                <tr>
                  <td>inc</td>
                  <td>step size between successive iterations</td>
                </tr>
                <tr>
                  <td>c</td>
                  <td>unary operator to apply to each index</td>
                </tr>
              </tbody>
            </table>
<p>This function is equivalent to a parallel execution of the following loop on a GPU:</p><pre class="m-code"><span class="c1">// step is positive [first, last)</span>
<span class="k">for</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">i</span><span class="o">=</span><span class="n">first</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">&lt;</span><span class="n">last</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">+=</span><span class="n">step</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="n">c</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
<span class="p">}</span>

<span class="c1">// step is negative [first, last)</span>
<span class="k">for</span><span class="p">(</span><span class="k">auto</span><span class="w"> </span><span class="n">i</span><span class="o">=</span><span class="n">first</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">&gt;</span><span class="n">last</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">+=</span><span class="n">step</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="n">c</span><span class="p">(</span><span class="n">i</span><span class="p">);</span>
<span class="p">}</span></pre>
          </div></section>
          <section class="m-doc-details" id="a3ed764530620a419e3400e1f9ab6c956"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename O, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a3ed764530620a419e3400e1f9ab6c956" class="m-doc-self">cuda_transform</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C op)</span></span>
            </h3>
            <p>performs asynchronous parallel transforms over a range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>output iterator type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>unary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>output</td>
                  <td>iterator to the beginning of the output range</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>unary operator to apply to transform each item</td>
                </tr>
              </tbody>
            </table>
<p>This method is equivalent to the parallel execution of the following loop on a GPU:</p><pre class="m-code"><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">first</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="o">*</span><span class="n">output</span><span class="o">++</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">op</span><span class="p">(</span><span class="o">*</span><span class="n">first</span><span class="o">++</span><span class="p">);</span>
<span class="p">}</span></pre>
          </div></section>
          <section class="m-doc-details" id="abdcb5b755f7ace2aa452541d5bf93b5f"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I1, typename I2, typename O, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#abdcb5b755f7ace2aa452541d5bf93b5f" class="m-doc-self">cuda_transform</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I1 first1,
              I1 last1,
              I2 first2,
              O output,
              C op)</span></span>
            </h3>
            <p>performs asynchronous parallel transforms over two ranges of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I1</td>
                  <td>first input iterator type</td>
                </tr>
                <tr>
                  <td>I2</td>
                  <td>second input iterator type</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>output iterator type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>binary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first1</td>
                  <td>iterator to the beginning of the first range</td>
                </tr>
                <tr>
                  <td>last1</td>
                  <td>iterator to the end of the first range</td>
                </tr>
                <tr>
                  <td>first2</td>
                  <td>iterator to the beginning of the second range</td>
                </tr>
                <tr>
                  <td>output</td>
                  <td>iterator to the beginning of the output range</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>binary operator to apply to transform each pair of items</td>
                </tr>
              </tbody>
            </table>
<p>This method is equivalent to the parallel execution of the following loop on a GPU:</p><pre class="m-code"><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">first1</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last1</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="o">*</span><span class="n">output</span><span class="o">++</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">op</span><span class="p">(</span><span class="o">*</span><span class="n">first1</span><span class="o">++</span><span class="p">,</span><span class="w"> </span><span class="o">*</span><span class="n">first2</span><span class="o">++</span><span class="p">);</span>
<span class="p">}</span></pre>
          </div></section>
          <section class="m-doc-details" id="a8a872d2a0ac73a676713cb5be5aa688c"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename T, typename O&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a8a872d2a0ac73a676713cb5be5aa688c" class="m-doc-self">cuda_reduce</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              T* res,
              O op,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous parallel reduction over a range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>T</td>
                  <td>value type</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>binary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>res</td>
                  <td>pointer to the result</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>binary operator to apply to reduce elements</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
<p>This method is equivalent to the parallel execution of the following loop on a GPU:</p><pre class="m-code"><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">first</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="o">*</span><span class="n">result</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">op</span><span class="p">(</span><span class="o">*</span><span class="n">result</span><span class="p">,</span><span class="w"> </span><span class="o">*</span><span class="n">first</span><span class="o">++</span><span class="p">);</span>
<span class="p">}</span></pre>
          </div></section>
          <section class="m-doc-details" id="a492e8410db032a0273a99dd905486161"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename T, typename O&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a492e8410db032a0273a99dd905486161" class="m-doc-self">cuda_uninitialized_reduce</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              T* res,
              O op,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous parallel reduction over a range of items without an initial value</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>T</td>
                  <td>value type</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>binary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>res</td>
                  <td>pointer to the result</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>binary operator to apply to reduce elements</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
<p>This method is equivalent to the parallel execution of the following loop on a GPU:</p><pre class="m-code"><span class="o">*</span><span class="n">result</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">first</span><span class="o">++</span><span class="p">;</span><span class="w">  </span><span class="c1">// no initial values partitipcate in the loop</span>
<span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">first</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="o">*</span><span class="n">result</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">op</span><span class="p">(</span><span class="o">*</span><span class="n">result</span><span class="p">,</span><span class="w"> </span><span class="o">*</span><span class="n">first</span><span class="o">++</span><span class="p">);</span>
<span class="p">}</span></pre>
          </div></section>
          <section class="m-doc-details" id="a4463d06240d608bc31d8b3546a851e4e"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename T, typename O, typename U&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a4463d06240d608bc31d8b3546a851e4e" class="m-doc-self">cuda_transform_reduce</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              T* res,
              O bop,
              U uop,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous parallel reduction over a range of transformed items without an initial value</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>T</td>
                  <td>value type</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>binary operator type</td>
                </tr>
                <tr>
                  <td>U</td>
                  <td>unary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>res</td>
                  <td>pointer to the result</td>
                </tr>
                <tr>
                  <td>bop</td>
                  <td>binary operator to apply to reduce elements</td>
                </tr>
                <tr>
                  <td>uop</td>
                  <td>unary operator to apply to transform elements</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
<p>This method is equivalent to the parallel execution of the following loop on a GPU:</p><pre class="m-code"><span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">first</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="o">*</span><span class="n">result</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">bop</span><span class="p">(</span><span class="o">*</span><span class="n">result</span><span class="p">,</span><span class="w"> </span><span class="n">uop</span><span class="p">(</span><span class="o">*</span><span class="n">first</span><span class="o">++</span><span class="p">));</span>
<span class="p">}</span></pre>
          </div></section>
          <section class="m-doc-details" id="aa451668b7a0a3abf385cf2abebed8962"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename T, typename O, typename U&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#aa451668b7a0a3abf385cf2abebed8962" class="m-doc-self">cuda_uninitialized_transform_reduce</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              T* res,
              O bop,
              U uop,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous parallel reduction over a range of transformed items with an initial value</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>T</td>
                  <td>value type</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>binary operator type</td>
                </tr>
                <tr>
                  <td>U</td>
                  <td>unary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>res</td>
                  <td>pointer to the result</td>
                </tr>
                <tr>
                  <td>bop</td>
                  <td>binary operator to apply to reduce elements</td>
                </tr>
                <tr>
                  <td>uop</td>
                  <td>unary operator to apply to transform elements</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
<p>This method is equivalent to the parallel execution of the following loop on a GPU:</p><pre class="m-code"><span class="o">*</span><span class="n">result</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">uop</span><span class="p">(</span><span class="o">*</span><span class="n">first</span><span class="o">++</span><span class="p">);</span><span class="w">  </span><span class="c1">// no initial values partitipcate in the loop</span>
<span class="k">while</span><span class="w"> </span><span class="p">(</span><span class="n">first</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="o">*</span><span class="n">result</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">bop</span><span class="p">(</span><span class="o">*</span><span class="n">result</span><span class="p">,</span><span class="w"> </span><span class="n">uop</span><span class="p">(</span><span class="o">*</span><span class="n">first</span><span class="o">++</span><span class="p">));</span>
<span class="p">}</span></pre>
          </div></section>
          <section class="m-doc-details" id="a2e1b44c84a09e0a8495a611cb9a7ea40"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename O, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a2e1b44c84a09e0a8495a611cb9a7ea40" class="m-doc-self">cuda_inclusive_scan</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C op,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous inclusive scan over a range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>output iterator</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>binary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the input range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the input range</td>
                </tr>
                <tr>
                  <td>output</td>
                  <td>iterator to the beginning of the output range</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>binary operator to apply to scan</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
          </div></section>
          <section class="m-doc-details" id="afa4aa760ddb6efbda1b9bab505ad5baf"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename O, typename C, typename U&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#afa4aa760ddb6efbda1b9bab505ad5baf" class="m-doc-self">cuda_transform_inclusive_scan</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C bop,
              U uop,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous inclusive scan over a range of transformed items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>output iterator</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>binary operator type</td>
                </tr>
                <tr>
                  <td>U</td>
                  <td>unary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the input range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the input range</td>
                </tr>
                <tr>
                  <td>output</td>
                  <td>iterator to the beginning of the output range</td>
                </tr>
                <tr>
                  <td>bop</td>
                  <td>binary operator to apply to scan</td>
                </tr>
                <tr>
                  <td>uop</td>
                  <td>unary operator to apply to transform each item before scan</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
          </div></section>
          <section class="m-doc-details" id="aeb391c40120844318fd715b8c3a716bb"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename O, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#aeb391c40120844318fd715b8c3a716bb" class="m-doc-self">cuda_exclusive_scan</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C op,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous exclusive scan over a range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>output iterator</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>binary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the input range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the input range</td>
                </tr>
                <tr>
                  <td>output</td>
                  <td>iterator to the beginning of the output range</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>binary operator to apply to scan</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
          </div></section>
          <section class="m-doc-details" id="a2e739895c1c73538967af060ca714366"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename O, typename C, typename U&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a2e739895c1c73538967af060ca714366" class="m-doc-self">cuda_transform_exclusive_scan</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              O output,
              C bop,
              U uop,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous exclusive scan over a range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>output iterator</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>binary operator type</td>
                </tr>
                <tr>
                  <td>U</td>
                  <td>unary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the input range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the input range</td>
                </tr>
                <tr>
                  <td>output</td>
                  <td>iterator to the beginning of the output range</td>
                </tr>
                <tr>
                  <td>bop</td>
                  <td>binary operator to apply to scan</td>
                </tr>
                <tr>
                  <td>uop</td>
                  <td>unary operator to apply to transform each item before scan</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
          </div></section>
          <section class="m-doc-details" id="aa84d4c68d2cbe9f6efc4a1eb1a115458"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename a_keys_it, typename a_vals_it, typename b_keys_it, typename b_vals_it, typename c_keys_it, typename c_vals_it, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#aa84d4c68d2cbe9f6efc4a1eb1a115458" class="m-doc-self">cuda_merge_by_key</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              a_keys_it a_keys_first,
              a_keys_it a_keys_last,
              a_vals_it a_vals_first,
              b_keys_it b_keys_first,
              b_keys_it b_keys_last,
              b_vals_it b_vals_first,
              c_keys_it c_keys_first,
              c_vals_it c_vals_first,
              C comp,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous key-value merge over a range of keys and values</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>a_keys_it</td>
                  <td>first key iterator type</td>
                </tr>
                <tr>
                  <td>a_vals_it</td>
                  <td>first value iterator type</td>
                </tr>
                <tr>
                  <td>b_keys_it</td>
                  <td>second key iterator type</td>
                </tr>
                <tr>
                  <td>b_vals_it</td>
                  <td>second value iterator type</td>
                </tr>
                <tr>
                  <td>c_keys_it</td>
                  <td>output key iterator type</td>
                </tr>
                <tr>
                  <td>c_vals_it</td>
                  <td>output value iterator type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>comparator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>a_keys_first</td>
                  <td>iterator to the beginning of the first key range</td>
                </tr>
                <tr>
                  <td>a_keys_last</td>
                  <td>iterator to the end of the first key range</td>
                </tr>
                <tr>
                  <td>a_vals_first</td>
                  <td>iterator to the beginning of the first value range</td>
                </tr>
                <tr>
                  <td>b_keys_first</td>
                  <td>iterator to the beginning of the second key range</td>
                </tr>
                <tr>
                  <td>b_keys_last</td>
                  <td>iterator to the end of the second key range</td>
                </tr>
                <tr>
                  <td>b_vals_first</td>
                  <td>iterator to the beginning of the second value range</td>
                </tr>
                <tr>
                  <td>c_keys_first</td>
                  <td>iterator to the beginning of the output key range</td>
                </tr>
                <tr>
                  <td>c_vals_first</td>
                  <td>iterator to the beginning of the output value range</td>
                </tr>
                <tr>
                  <td>comp</td>
                  <td>comparator</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
<p>Performs a key-value merge that copies elements from <code>[a_keys_first, a_keys_last)</code> and <code>[b_keys_first, b_keys_last)</code> into a single range, <code>[c_keys_first, c_keys_last + (a_keys_last - a_keys_first) + (b_keys_last - b_keys_first))</code> such that the resulting range is in ascending key order.</p><p>At the same time, the merge copies elements from the two associated ranges <code>[a_vals_first + (a_keys_last - a_keys_first))</code> and <code>[b_vals_first + (b_keys_last - b_keys_first))</code> into a single range, <code>[c_vals_first, c_vals_first + (a_keys_last - a_keys_first) + (b_keys_last - b_keys_first))</code> such that the resulting range is in ascending order implied by each input element&#x27;s associated key.</p><p>For example, assume:</p><ul><li><code>a_keys</code> = {1, 8};</li><li><code>a_vals</code> = {2, 1};</li><li><code>b_keys</code> = {3, 7};</li><li><code>b_vals</code> = {3, 4};</li></ul><p>After the merge, we have:</p><ul><li><code>c_keys</code> = {1, 3, 7, 8}</li><li><code>c_vals</code> = {2, 3, 4, 1}</li></ul>
          </div></section>
          <section class="m-doc-details" id="a37ec481149c2f01669353033d75ed72a"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename a_keys_it, typename b_keys_it, typename c_keys_it, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a37ec481149c2f01669353033d75ed72a" class="m-doc-self">cuda_merge</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              a_keys_it a_keys_first,
              a_keys_it a_keys_last,
              b_keys_it b_keys_first,
              b_keys_it b_keys_last,
              c_keys_it c_keys_first,
              C comp,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous key-only merge over a range of keys</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>a_keys_it</td>
                  <td>first key iterator type</td>
                </tr>
                <tr>
                  <td>b_keys_it</td>
                  <td>second key iterator type</td>
                </tr>
                <tr>
                  <td>c_keys_it</td>
                  <td>output key iterator type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>comparator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>a_keys_first</td>
                  <td>iterator to the beginning of the first key range</td>
                </tr>
                <tr>
                  <td>a_keys_last</td>
                  <td>iterator to the end of the first key range</td>
                </tr>
                <tr>
                  <td>b_keys_first</td>
                  <td>iterator to the beginning of the second key range</td>
                </tr>
                <tr>
                  <td>b_keys_last</td>
                  <td>iterator to the end of the second key range</td>
                </tr>
                <tr>
                  <td>c_keys_first</td>
                  <td>iterator to the beginning of the output key range</td>
                </tr>
                <tr>
                  <td>comp</td>
                  <td>comparator</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
<p>This function is equivalent to <a href="namespacetf.html#aa84d4c68d2cbe9f6efc4a1eb1a115458" class="m-doc">tf::<wbr />cuda_merge_by_key</a> without values.</p>
          </div></section>
          <section class="m-doc-details" id="a9c69906a4dfd1e2d0cd7ed496d29dafd"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename K, typename V = cudaEmpty&gt;
              </div>
              <span class="m-doc-wrap-bumper">unsigned tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a9c69906a4dfd1e2d0cd7ed496d29dafd" class="m-doc-self">cuda_sort_buffer_size</a>(</span><span class="m-doc-wrap">unsigned count)</span></span>
            </h3>
            <p>queries the buffer size in bytes needed to call sort kernels for the given number of elements</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>K</td>
                  <td>key type</td>
                </tr>
                <tr>
                  <td>V</td>
                  <td>value type (default tf::cudaEmpty)</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>count</td>
                  <td>number of keys/values to sort</td>
                </tr>
              </tbody>
            </table>
<p>The function is used to allocate a buffer for calling <a href="namespacetf.html#a06804cb1598e965febc7bd35fc0fbbb0" class="m-doc">tf::<wbr />cuda_sort</a>.</p>
          </div></section>
          <section class="m-doc-details" id="a3461b9179221dd7230ce2a0e45156c7f"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename K_it, typename V_it, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a3461b9179221dd7230ce2a0e45156c7f" class="m-doc-self">cuda_sort_by_key</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              K_it k_first,
              K_it k_last,
              V_it v_first,
              C comp,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous key-value sort on a range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>K_it</td>
                  <td>key iterator type</td>
                </tr>
                <tr>
                  <td>V_it</td>
                  <td>value iterator type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>comparator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>k_first</td>
                  <td>iterator to the beginning of the key range</td>
                </tr>
                <tr>
                  <td>k_last</td>
                  <td>iterator to the end of the key range</td>
                </tr>
                <tr>
                  <td>v_first</td>
                  <td>iterator to the beginning of the value range</td>
                </tr>
                <tr>
                  <td>comp</td>
                  <td>binary comparator</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
<p>Sorts key-value elements in <code>[k_first, k_last)</code> and <code>[v_first, v_first + (k_last - k_first))</code> into ascending key order using the given comparator <code>comp</code>. If <code>i</code> and <code>j</code> are any two valid iterators in <code>[k_first, k_last)</code> such that <code>i</code> precedes <code>j</code>, and <code>p</code> and <code>q</code> are iterators in <code>[v_first, v_first + (k_last - k_first))</code> corresponding to <code>i</code> and <code>j</code> respectively, then <code>comp(*j, *i)</code> evaluates to <code>false</code>.</p><p>For example, assume:</p><ul><li><code>keys</code> are <code>{1, 4, 2, 8, 5, 7}</code></li><li><code>values</code> are <code>{&#x27;a&#x27;, &#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;e&#x27;, &#x27;f&#x27;}</code></li></ul><p>After sort:</p><ul><li><code>keys</code> are <code>{1, 2, 4, 5, 7, 8}</code></li><li><code>values</code> are <code>{&#x27;a&#x27;, &#x27;c&#x27;, &#x27;b&#x27;, &#x27;e&#x27;, &#x27;f&#x27;, &#x27;d&#x27;}</code></li></ul>
          </div></section>
          <section class="m-doc-details" id="a06804cb1598e965febc7bd35fc0fbbb0"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename K_it, typename C&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a06804cb1598e965febc7bd35fc0fbbb0" class="m-doc-self">cuda_sort</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              K_it k_first,
              K_it k_last,
              C comp,
              void* buf)</span></span>
            </h3>
            <p>performs asynchronous key-only sort on a range of items</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>K_it</td>
                  <td>key iterator type</td>
                </tr>
                <tr>
                  <td>C</td>
                  <td>comparator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>k_first</td>
                  <td>iterator to the beginning of the key range</td>
                </tr>
                <tr>
                  <td>k_last</td>
                  <td>iterator to the end of the key range</td>
                </tr>
                <tr>
                  <td>comp</td>
                  <td>binary comparator</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the temporary buffer</td>
                </tr>
              </tbody>
            </table>
<p>This method is equivalent to <a href="namespacetf.html#a3461b9179221dd7230ce2a0e45156c7f" class="m-doc">tf::<wbr />cuda_sort_by_key</a> without values.</p>
          </div></section>
          <section class="m-doc-details" id="a5f9dabd7c5d0fa5166cf76d9fa5a038e"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename U&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a5f9dabd7c5d0fa5166cf76d9fa5a038e" class="m-doc-self">cuda_find_if</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              unsigned* idx,
              U op)</span></span>
            </h3>
            <p>finds the index of the first element that satisfies the given criteria</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>U</td>
                  <td>unary operator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>idx</td>
                  <td>pointer to the index of the found element</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>unary operator which returns <code>true</code> for the required element</td>
                </tr>
              </tbody>
            </table>
<p>The function launches kernels asynchronously to find the index <code>idx</code> of the first element in the range <code>[first, last)</code> such that <code>op(*(first+idx))</code> is true. This is equivalent to the parallel execution of the following loop:</p><pre class="m-code"><span class="kt">unsigned</span><span class="w"> </span><span class="n">idx</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span>
<span class="k">for</span><span class="p">(;</span><span class="w"> </span><span class="n">first</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">;</span><span class="w"> </span><span class="o">++</span><span class="n">first</span><span class="p">,</span><span class="w"> </span><span class="o">++</span><span class="n">idx</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">p</span><span class="p">(</span><span class="o">*</span><span class="n">first</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w">    </span><span class="k">return</span><span class="w"> </span><span class="n">idx</span><span class="p">;</span>
<span class="w">  </span><span class="p">}</span>
<span class="p">}</span>
<span class="k">return</span><span class="w"> </span><span class="n">idx</span><span class="p">;</span></pre>
          </div></section>
          <section class="m-doc-details" id="a572c13198191c46765264f8afabe2e9f"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename O&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a572c13198191c46765264f8afabe2e9f" class="m-doc-self">cuda_min_element</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              unsigned* idx,
              O op,
              void* buf)</span></span>
            </h3>
            <p>finds the index of the minimum element in a range</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>comparator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy object</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>idx</td>
                  <td>solution index of the minimum element</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>comparison function object</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the buffer</td>
                </tr>
              </tbody>
            </table>
<p>The function launches kernels asynchronously to find the smallest element in the range <code>[first, last)</code> using the given comparator <code>op</code>. You need to provide a buffer that holds at least tf::cuda_min_element_bufsz bytes for internal use. The function is equivalent to a parallel execution of the following loop:</p><pre class="m-code"><span class="k">if</span><span class="p">(</span><span class="n">first</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">last</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="k">return</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span>
<span class="p">}</span>
<span class="k">auto</span><span class="w"> </span><span class="n">smallest</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">first</span><span class="p">;</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="o">++</span><span class="n">first</span><span class="p">;</span><span class="w"> </span><span class="n">first</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">;</span><span class="w"> </span><span class="o">++</span><span class="n">first</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">op</span><span class="p">(</span><span class="o">*</span><span class="n">first</span><span class="p">,</span><span class="w"> </span><span class="o">*</span><span class="n">smallest</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w">    </span><span class="n">smallest</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">first</span><span class="p">;</span>
<span class="w">  </span><span class="p">}</span>
<span class="p">}</span>
<span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">distance</span><span class="p">(</span><span class="n">first</span><span class="p">,</span><span class="w"> </span><span class="n">smallest</span><span class="p">);</span></pre>
          </div></section>
          <section class="m-doc-details" id="a3fc577fd0a8f127770bcf68bc56c073e"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P, typename I, typename O&gt;
              </div>
              <span class="m-doc-wrap-bumper">void tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a3fc577fd0a8f127770bcf68bc56c073e" class="m-doc-self">cuda_max_element</a>(</span><span class="m-doc-wrap">P&amp;&amp; p,
              I first,
              I last,
              unsigned* idx,
              O op,
              void* buf)</span></span>
            </h3>
            <p>finds the index of the maximum element in a range</p>
            <table class="m-table m-fullwidth m-flat">
              <thead>
                <tr><th colspan="2">Template parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td style="width: 1%">P</td>
                  <td>execution policy type</td>
                </tr>
                <tr>
                  <td>I</td>
                  <td>input iterator type</td>
                </tr>
                <tr>
                  <td>O</td>
                  <td>comparator type</td>
                </tr>
              </tbody>
              <thead>
                <tr><th colspan="2">Parameters</th></tr>
              </thead>
              <tbody>
                <tr>
                  <td>p</td>
                  <td>execution policy object</td>
                </tr>
                <tr>
                  <td>first</td>
                  <td>iterator to the beginning of the range</td>
                </tr>
                <tr>
                  <td>last</td>
                  <td>iterator to the end of the range</td>
                </tr>
                <tr>
                  <td>idx</td>
                  <td>solution index of the maximum element</td>
                </tr>
                <tr>
                  <td>op</td>
                  <td>comparison function object</td>
                </tr>
                <tr>
                  <td>buf</td>
                  <td>pointer to the buffer</td>
                </tr>
              </tbody>
            </table>
<p>The function launches kernels asynchronously to find the largest element in the range <code>[first, last)</code> using the given comparator <code>op</code>. You need to provide a buffer that holds at least tf::cuda_max_element_bufsz bytes for internal use. The function is equivalent to a parallel execution of the following loop:</p><pre class="m-code"><span class="k">if</span><span class="p">(</span><span class="n">first</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">last</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="k">return</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span>
<span class="p">}</span>
<span class="k">auto</span><span class="w"> </span><span class="n">largest</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">first</span><span class="p">;</span>
<span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="o">++</span><span class="n">first</span><span class="p">;</span><span class="w"> </span><span class="n">first</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">last</span><span class="p">;</span><span class="w"> </span><span class="o">++</span><span class="n">first</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w">  </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">op</span><span class="p">(</span><span class="o">*</span><span class="n">largest</span><span class="p">,</span><span class="w"> </span><span class="o">*</span><span class="n">first</span><span class="p">))</span><span class="w"> </span><span class="p">{</span>
<span class="w">    </span><span class="n">largest</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">first</span><span class="p">;</span>
<span class="w">  </span><span class="p">}</span>
<span class="p">}</span>
<span class="k">return</span><span class="w"> </span><span class="n">std</span><span class="o">::</span><span class="n">distance</span><span class="p">(</span><span class="n">first</span><span class="p">,</span><span class="w"> </span><span class="n">largest</span><span class="p">);</span></pre>
          </div></section>
          <section class="m-doc-details" id="a06790e5f6898894392f247309626e1b4"><div>
            <h3>
              <span class="m-doc-wrap-bumper">const char* tf::<wbr /></span><span class="m-doc-wrap"><span class="m-doc-wrap-bumper"><a href="#a06790e5f6898894392f247309626e1b4" class="m-doc-self">version</a>(</span><span class="m-doc-wrap">) <span class="m-label m-primary">constexpr</span></span></span>
            </h3>
            <p>queries the version information in a string format <code>major.minor.patch</code></p>
<p>Release notes are available here: <a href="https://taskflow.github.io/taskflow/Releases.html">https:/<wbr />/<wbr />taskflow.github.io/<wbr />taskflow/<wbr />Releases.html</a></p>
          </div></section>
        </section>
        <section>
          <h2>Variable documentation</h2>
          <section class="m-doc-details" id="ad3a41adc2499a9519da3e77dc3e9849c"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P&gt;
              </div>
              bool tf::<wbr /><a href="#ad3a41adc2499a9519da3e77dc3e9849c" class="m-doc-self">is_task_params_v</a> <span class="m-label m-primary">constexpr</span>
            </h3>
            <p>determines if the given type is a task parameter type</p>
<p><a href="classtf_1_1Task.html" class="m-doc">Task</a> parameters can be specified in one of the following types:</p><ul><li><a href="structtf_1_1TaskParams.html" class="m-doc">tf::<wbr />TaskParams</a>: assign the struct of defined parameters</li><li><a href="structtf_1_1DefaultTaskParams.html" class="m-doc">tf::<wbr />DefaultTaskParams</a>: assign nothing</li><li><a href="http://en.cppreference.com/w/cpp/string/basic_string.html" class="m-doc-external">std::<wbr />string</a>: assign a name to the task</li></ul>
          </div></section>
          <section class="m-doc-details" id="aefeb96086f4a99f0e58a0f321012a52c"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename C&gt;
              </div>
              bool tf::<wbr /><a href="#aefeb96086f4a99f0e58a0f321012a52c" class="m-doc-self">is_subflow_task_v</a> <span class="m-label m-primary">constexpr</span>
            </h3>
            <p>determines if a callable is a dynamic task</p>
<p>A dynamic task is a callable object constructible from std::function&lt;void(Subflow&amp;)&gt;.</p>
          </div></section>
          <section class="m-doc-details" id="a00ca2fc2de0e679a7d9b8039340343df"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename C&gt;
              </div>
              bool tf::<wbr /><a href="#a00ca2fc2de0e679a7d9b8039340343df" class="m-doc-self">is_condition_task_v</a> <span class="m-label m-primary">constexpr</span>
            </h3>
            <p>determines if a callable is a condition task</p>
<p>A condition task is a callable object constructible from std::function&lt;int()&gt; or std::function&lt;int(tf::Runtime&amp;)&gt;.</p>
          </div></section>
          <section class="m-doc-details" id="a78c40dc8776735b0f2c27cd446481aff"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename C&gt;
              </div>
              bool tf::<wbr /><a href="#a78c40dc8776735b0f2c27cd446481aff" class="m-doc-self">is_multi_condition_task_v</a> <span class="m-label m-primary">constexpr</span>
            </h3>
            <p>determines if a callable is a multi-condition task</p>
<p>A multi-condition task is a callable object constructible from <a href="http://en.cppreference.com/w/cpp/utility/functional/function.html" class="m-doc-external">std::<wbr />function</a>&lt;tf::SmallVector&lt;int&gt;()&gt; or <a href="http://en.cppreference.com/w/cpp/utility/functional/function.html" class="m-doc-external">std::<wbr />function</a>&lt;tf::SmallVector&lt;int&gt;(tf::Runtime&amp;)&gt;.</p>
          </div></section>
          <section class="m-doc-details" id="a11fc9c98eb3a0d3a9aa55598b1f4d614"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename C&gt;
              </div>
              bool tf::<wbr /><a href="#a11fc9c98eb3a0d3a9aa55598b1f4d614" class="m-doc-self">is_static_task_v</a> <span class="m-label m-primary">constexpr</span>
            </h3>
            <p>determines if a callable is a static task</p>
<p>A static task is a callable object constructible from std::function&lt;void()&gt; or std::function&lt;void(tf::Runtime&amp;)&gt;.</p>
          </div></section>
          <section class="m-doc-details" id="a73c20705fc54763f195a00b6e626e301"><div>
            <h3>
              <div class="m-doc-template">
                template&lt;typename P&gt;
              </div>
              bool tf::<wbr /><a href="#a73c20705fc54763f195a00b6e626e301" class="m-doc-self">is_partitioner_v</a> <span class="m-label m-primary">constexpr</span>
            </h3>
            <p>determines if a type is a partitioner</p>
<p>A partitioner is a derived type from <a href="classtf_1_1PartitionerBase.html" class="m-doc">tf::<wbr />PartitionerBase</a>.</p>
          </div></section>
        </section>
      </div>
    </div>
  </div>
</article></main>
<div class="m-doc-search" id="search">
  <a href="#!" onclick="return hideSearch()"></a>
  <div class="m-container">
    <div class="m-row">
      <div class="m-col-m-8 m-push-m-2">
        <div class="m-doc-search-header m-text m-small">
          <div><span class="m-label m-default">Tab</span> / <span class="m-label m-default">T</span> to search, <span class="m-label m-default">Esc</span> to close</div>
          <div id="search-symbolcount">&hellip;</div>
        </div>
        <div class="m-doc-search-content">
          <form>
            <input type="search" name="q" id="search-input" placeholder="Loading &hellip;" disabled="disabled" autofocus="autofocus" autocomplete="off" spellcheck="false" />
          </form>
          <noscript class="m-text m-danger m-text-center">Unlike everything else in the docs, the search functionality <em>requires</em> JavaScript.</noscript>
          <div id="search-help" class="m-text m-dim m-text-center">
            <p class="m-noindent">Search for symbols, directories, files, pages or
            modules. You can omit any prefix from the symbol or file path; adding a
            <code>:</code> or <code>/</code> suffix lists all members of given symbol or
            directory.</p>
            <p class="m-noindent">Use <span class="m-label m-dim">&darr;</span>
            / <span class="m-label m-dim">&uarr;</span> to navigate through the list,
            <span class="m-label m-dim">Enter</span> to go.
            <span class="m-label m-dim">Tab</span> autocompletes common prefix, you can
            copy a link to the result using <span class="m-label m-dim">⌘</span>
            <span class="m-label m-dim">L</span> while <span class="m-label m-dim">⌘</span>
            <span class="m-label m-dim">M</span> produces a Markdown link.</p>
          </div>
          <div id="search-notfound" class="m-text m-warning m-text-center">Sorry, nothing was found.</div>
          <ul id="search-results"></ul>
        </div>
      </div>
    </div>
  </div>
</div>
<script src="search-v2.js"></script>
<script src="searchdata-v2.js" async="async"></script>
<footer><nav>
  <div class="m-container">
    <div class="m-row">
      <div class="m-col-l-10 m-push-l-1">
        <p>Taskflow handbook is part of the <a href="https://taskflow.github.io">Taskflow project</a>, copyright © <a href="https://tsung-wei-huang.github.io/">Dr. Tsung-Wei Huang</a>, 2018&ndash;2024.<br />Generated by <a href="https://doxygen.org/">Doxygen</a> 1.9.6 and <a href="https://mcss.mosra.cz/">m.css</a>.</p>
      </div>
    </div>
  </div>
</nav></footer>
</body>
</html>
